/***************************************************************************
 *
 * Copyright (C) 2001 International Business Machines
 * All rights reserved.
 *
 * This file is part of the GPFS mmfslinux kernel module.
 *
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions 
 * are met:
 *
 *  1. Redistributions of source code must retain the above copyright notice, 
 *     this list of conditions and the following disclaimer. 
 *  2. Redistributions in binary form must reproduce the above copyright 
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution. 
 *  3. The name of the author may not be used to endorse or promote products 
 *     derived from this software without specific prior written
 *     permission. 
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *************************************************************************** */
/* $Id: dir.c,v 1.25.4.2 2002/04/17 06:12:07 mcnabb Exp $
 *
 * $Log: dir.c,v $
 * Revision 1.25.4.2  2002/04/17 06:12:07  mcnabb
 * Fix another bug in multi-node delete-on-last-close on Linux:
 * If a file is accessed on Node A, renamed on Node B, and then deleted on
 * Node B, an invalidated dcache entry referring to the old name of the
 * file was left over on Node A, preventing the file from being destroyed.
 *
 * Revision 1.25.4.1  2002/03/29 05:02:58  mcnabb
 * Fix bug in multi-node delete-on-last-close on Linux:
 * If a file is deleted on Node A, but still open on Node B, inode token
 * revoke on Node B was marking the dcache entry as invalid, but left it in
 * the cache.  Even after the file was closed, the dcache entry on Node B
 * would stay in the cache until the next lookup operation would find the
 * dentry invalid and call d_invalidate, which might not happen for a long
 * time, if ever.  Since the existence of the dcache entry prevents the
 * file from being destroyed, the mnode token revoke on a deleted file must
 * "uncache" the dentry, i.e., remove it from the Linux dcache hash table,
 * just like d_invalidate would.
 *
 * Revision 1.25  2001/05/30 20:41:27  wyllie
 * Trace inode numbers more consistently
 *
 * Revision 1.24  2001/05/04 23:30:11  schmuck
 * Move "extern struct xxx_operations ..." declarations into .h file
 * instead of replicating them in various .c files.
 * Replace empty gpfs_dops_valid table with a NULL pointer.
 *
 * Revision 1.23  2001/05/04 14:49:17  wyllie
 * Remove gpfs_d_iput, since it only did an iput enclosed in traces.  As of
 * Linux kernel version 2.4.2, gpfs_d_iput should have done an iput_free()
 * anyway.
 *
 * Revision 1.22  2001/05/02 20:45:06  eshel
 * Make sure the all OS nodes are invalidated before auto remount.
 *
 * Revision 1.21  2001/05/02 00:21:22  schmuck
 * Fix another problem found by tortureDir test on Linux:
 * On lookup and create, instantiate the dcache entry while holding the
 * inode lock, or, in case of a negative dcache entry, the directory lock.
 * This closes a window where a token revoke could clear the
 * CO_VFS_REFERENCE flag without invalidating the dcache entry.
 * It also eliminates the need for a d_revalidate operation.
 *
 * Revision 1.20  2001/04/11 20:05:24  dcraft
 * fix dentry instantiation race conditions since no multinode
 * serialization is held.  add d_revalidate capability for negative
 * name dentries.  d_delete calls are the responsibility of the kernel
 * (not gpfs).
 *
 * Revision 1.19  2001/04/08 22:18:28  dcraft
 * Fix multinde delete race conditions.  Still incomplete.
 *
 * Revision 1.18  2001/04/04 21:14:41  dcraft
 * Invalidate inode attributes (particularly i_nlink) when getVattr() can no longer
 * find inode.   Update attributes after rename over an existing file, so d_move
 * will correctly kill target dentry.   Add printing of dentries when "mmfsadm dump vnodes"
 * is executed.  Initial implementation of gpfs_d_revalidate.
 *
 * Revision 1.17  2001/03/29 21:32:56  dixonbp
 * Convert block,dir, and lxtrace to .c files
 *
 * Revision 1.10  2001/03/21 16:01:44  wyllie
 * Opposite of win is not the opposite of tight
 *
 * Revision 1.9  2000/12/18 13:53:14  gjertsen
 * More cleanup of comments/documentation.
 *
 * Revision 1.8  2000/12/15 13:56:38  gjertsen
 * Clean up documentation.
 *
 */

#define __NO_VERSION__

#ifndef __KERNEL__
#define __KERNEL__
#endif

#include <Shark-gpl.h>

#include <linux/fs.h>

#include <cxiSystem.h>
#include <cxiSharedSeg.h>
#include <cxiCred.h>
#include <cxi2gpfs.h>
#include <linux2gpfs.h>
#include <Trace.h>


/* About dcache revalidation:

   The Linux directory cache (dcache) is used to cache the result of name
   lookups.  Linux caches positive as well as negative lookup results in its
   dcache entries (struct dentry): if the file existed at the time the last
   lookup was done (positive lookup), dentry->d_inode will point to the struct
   inode of the file; if the file did not exist (negative lookup),
   dentry->d_inode will be null.

   When a directory is modified on the local node, Linux will update its
   dcache entries accordingly.  When the directory is modified on another
   node, however, we need to invalidate local dcache entries:

    - A negative dcache entry becomes invalid when a file by the same name is
      created on another node.  This requires an exclusive byte-range token on
      the directory block in which the lookup was done that resulted in the
      dcache entry.  Hence, when we lose a byte-range token on a directory, we
      invalidate all negative dcache entries for lookups that were done in
      that directory.  This is done by a call to kxinvalidateOSNode with
      KXIVO_NEGDCACHE, which will result in a call to
      cxiInvalidateNegDCacheEntries() implemented here.

    - A positive dcache entry becomes invalid when the file it refers to
      is deleted, moved, or renamed on another node.  All of these operations
      require an exclusive inode lock.  Hence we invalidate a positive dcache
      entry when we lose the inode token for the file.  This more selective
      invalidation of positive dcache entries is more efficient than simply
      invalidating all dcache entries when we lose a byte-range token on the
      directory.  The invalidation is done by a call to kxinvalidateOSNode
      with CXI_IC_DCACHE, which will result in a call to
      cxiInvalidateDCacheEntry() implemented here.

   To invalidate a dcache entry Linux defines a d_revalidate function in the
   dentry_operations table.  This function is supposed to check whether the
   dcache entry is still valid and return 'true' or 'false' accordingly.
   If no d_revalidate function is given in the dentry_operations table,
   Linux assumes the dentry is valid.  Hence the most efficient way
   of marking a dentry as valid or invalid is to have the d_ops field in
   the dentry point to one of two different dentry_operations tables:
   one where the d_revalidate field is NULL (means the dentry is valid),
   and one where d_revalidate points at a function that always returns false
   (means the dentry is invalid). */


/* The following function is called to invalidate dcache entries for one file
 * when we lose the inode token for that file. Since d_prune_aliases() makes
 * a dput call on every dentry, we must be prepared to receive iput()
 * and consequently gpfs_s_put_inode() due to iput()
 *
 * Caller must have a reference on cxiNode_t to ensure it doesn't go
 * away during processing.
 */
int
cxiInvalidateDCacheEntry(cxiNode_t *cnP)
{
  struct inode *iP = (struct inode *)cnP->osNodeP;
  struct list_head *dListP, *dHeadP;
  struct dentry *dentry;
  int refCount;
  int holdCount;

  TRACE2(TRACE_VNODE, 11, TRCID_INVAL_DCACHE,
         "cxiInvalidateDCacheEntry: iP 0x%lX i_ino %d",
         iP, iP->i_ino);

again:
  refCount = 0;
  d_prune_aliases(iP);

  /* Traverse the list of all dentries that still refer to this file. */
  dHeadP = &iP->i_dentry;
  spin_lock(&dcache_lock);
  for (dListP = dHeadP->next; dListP != dHeadP; dListP = dListP->next)
  {
    /* count dentries that still refer to this file */
    refCount++;

    /* Mark the entry invalid by setting the d_op function table to
       gpfs_dops_invalid, which contains a d_revalidate function that
       always returns false. */
    dentry = list_entry(dListP, struct dentry, d_alias);
    dentry->d_op = &gpfs_dops_invalid;

    /* If the file was deleted, marking the dentry invalid is not sufficient */
    if (cnP->destroyIfDelInode)
    {
      /* We get here if the file was deleted, but is still open (more
         precisely, has a non-zero d_count) on this node; otherwise,
         d_prune_aliases would have removed the dentry already.  If we leave
         the dentry in the cache marked as invalid, it will remain in the
         cache even after the file is closed, and it will stay in the cache
         until the next lookup operation finds the entry invalid and calls
         d_invalidate, which might not happen for a long time, if ever.
         Since we want the file destroyed as soon as it is closed, we must
         "uncache" it here, i.e., remove it from the hash table that lookup
         uses, just like d_invalidate would. */
      if (!list_empty(&dentry->d_hash))
      {
        /* Check the d_count; we expect it to be non-zero (otherwise,
           d_prune_aliases would have removed it), but it is possible that
           somebody did a dput after we returned from d_prune_aliases.
           In this case, go back and let d_prune_aliases do the work. */
        holdCount = atomic_read(&dentry->d_count);
        TRACE2(TRACE_VNODE, 11, TRCID_INVAL_DUNCACHE,
               "removing dentry '%s': d_count %d",
               dentry->d_name.name, holdCount);
        if (holdCount == 0)
        {
          spin_unlock(&dcache_lock);
          goto again;
        }

        /* "Uncache" the file.  This is what d_invalidate would do, but since
           we're already holding the dcache lock, its easier to it here
           directly, rather than to drop the dcache lock, call d_invalidate
           and then restart the list traversal.  Note: d_invalidate does
           some additional checks for the possible existence of subdirectories,
           but since this file/directory is already deleted, it can no longer
           have any subdirectories. */
        list_del_init(&dentry->d_hash);
      }
    }
  }
  spin_unlock(&dcache_lock);

  return refCount;
}

/* The following function is like cxiInvalidateDCacheEntry() without the
 * d_prune_aliases() call. Since this routine is called while holding the
 * cxiNode_t and hash table entry locks. d_prune_aliases() will makes
 * a dput call on every dentry, and consequently gpfs_s_put_inode()
 * which will result in a deadlock.
 */
int
cxiInvalidateDCacheEntryNoPrune(cxiNode_t *cnP)
{
  struct inode *iP = (struct inode *)cnP->osNodeP;
  struct list_head *dListP, *dHeadP;
  struct dentry *dentry;
  int refCount = 0;

  /* Traverse the list of all dentries that refer to this file. */
  TRACE2(TRACE_VNODE, 11, TRCID_INVAL_DCACHE_NOPRUNE,
         "cxiInvalidateDCacheEntryNoPrune: iP 0x%lX i_ino %d",
         iP, iP->i_ino);
  dHeadP = &iP->i_dentry;
  spin_lock(&dcache_lock);
  for (dListP = dHeadP->next; dListP != dHeadP; dListP = dListP->next)
  {
    refCount++;
    /* Mark the entry invalid by setting the d_op function table to
       gpfs_dops_invalid, which contains a d_revalidate function that
       always returns false. */
    dentry = list_entry(dListP, struct dentry, d_alias);
    dentry->d_op = &gpfs_dops_invalid;
  }
  spin_unlock(&dcache_lock);

  return refCount;
}

/* The following function is called to remove invalid dcache entries for a
   file when the file is deleted on this node.
     Such invalid dcache entries occur when a file is renamed on another node
   before it is deleted here.  The rename revokes the inode token, which marks
   the dcache entry invalid, but does not remove it from the cache on this
   node.  When the file is deleted, the delete operation on this node will
   look up the file under its new name and turn the (new) dcache entry into a
   negative dcache entry, but since the file was renamed, it will not find or
   process the old, invalid dcache entry (the one referring to the old file
   name).  This function is called during delete (when the link count goes to
   zero) to remove old, invalid dcache entries, so the file can be destroyed.
     The function is similar to cxiInvalidateDCacheEntry, with the following
   differences: (1) it is only called on files that are being deleted (link
   count zero and destroyIfDelInode flag already set), (2) it does not mark
   any dcache entries as invalid; instead, it (3) only drops dcache entries
   that are already marked as invalid.  In particular, we do not want to
   invalidate the dcache entry referring to the current name being unlinked,
   because unlink will turn this into a valid, negative dcache entry. */
void
cxiDropInvalidDCacheEntries(cxiNode_t *cnP)
{
  struct inode *iP = (struct inode *)cnP->osNodeP;
  struct list_head *dListP, *dHeadP;
  struct dentry *dentry;
  int holdCount;

  TRACE2(TRACE_VNODE, 11, TRCID_DROP_INVAL_DCACHE,
         "cxiDropInvalidDCacheEntries: iP 0x%lX i_ino %d",
         iP, iP->i_ino);

  DBGASSERT(cnP->destroyIfDelInode);

again:
  d_prune_aliases(iP);

  /* Traverse the list of all dentries that still refer to this file. */
  dHeadP = &iP->i_dentry;
  spin_lock(&dcache_lock);
  for (dListP = dHeadP->next; dListP != dHeadP; dListP = dListP->next)
  {
    /* Check whether this dentry mas been marked invalid */
    dentry = list_entry(dListP, struct dentry, d_alias);
    if (dentry->d_op == &gpfs_dops_invalid)
    {
      /* Drop the dcache entry; for details see comments for same code in
         cxiInvalidateDCacheEntry above */
      if (!list_empty(&dentry->d_hash))
      {
        holdCount = atomic_read(&dentry->d_count);
        TRACE2(TRACE_VNODE, 11, TRCID_DROP_DUNCACHE,
               "removing dentry '%s': d_count %d",
               dentry->d_name.name, holdCount);
        if (holdCount == 0)
        {
          spin_unlock(&dcache_lock);
          goto again;
        }
        list_del_init(&dentry->d_hash);
      }
    }
  }
  spin_unlock(&dcache_lock);
}

/* The following function is called to invalidate negative dcache entries for
   all files in a directory when we lose the BR token for the directory. */
int
cxiInvalidateNegDCacheEntries(cxiNode_t *cnP)
{
  struct inode *iP = (struct inode *)cnP->osNodeP;
  struct list_head *dListP, *dHeadP;
  struct list_head *cListP, *cHeadP;
  struct dentry *dentry, *child;
  int refCount = 0;

  TRACE2(TRACE_VNODE, 11, TRCID_INVAL_NEG_DCACHE,
         "cxiInvalidateNegDCacheEntries: iP 0x%lX i_ino %d",
         iP, iP->i_ino);

  /* Traverse the list of all dentries that refer to this directory.
     Note: since we don't support hard links to directories, we expect
     there to be exactly one dentry on this list. */
  dHeadP = &iP->i_dentry;
  spin_lock(&dcache_lock);
  for (dListP = dHeadP->next; dListP != dHeadP; dListP = dListP->next)
  {
    refCount++;

    /* traverse the list of all children of this dentry */
    dentry = list_entry(dListP, struct dentry, d_alias);
    cHeadP = &dentry->d_subdirs;
    for (cListP = cHeadP->next; cListP != cHeadP; cListP = cListP->next)
    {
      /* If this child is a negative dentry (d_inode pointer is NULL),
         mark the entry invalid by setting the dop function table to
         gpfs_dops_invalid, which contains a d_revalidate function that
         always returns false.  Also invalidate dcache entries that are
         about to be deleted (unlink operation pending but not yet complete).
         These entries still have a non-null d_inode pointer, but are
         marked as "delete pending" by having a different d_op table
         (see also comment in gpfs_i_unlink). */
      child = list_entry(cListP, struct dentry, d_child);
      if (!child->d_inode ||
          child->d_op == &gpfs_dops_ddeletepending)
        child->d_op = &gpfs_dops_invalid;
    }
  }
  spin_unlock(&dcache_lock);
 
  return refCount;
}

/* dentry_operations */

/* The d_revalidate function is expected to check whether the directory entry
 * cached in the given dentry struct is still valid.  
 */
int
gpfs_d_invalid(struct dentry *dentry, int flags)
{
  TRACE4(TRACE_VNODE, 2, TRCID_DIR_001,
         "gpfs_d_invalid: dentry 0x%lX flags 0x%X: "
           "d_inode 0x%lX (name '%s') is invalid",
         dentry, flags, dentry->d_inode, dentry->d_name.name);
  return false;
}

